#!/usr/bin/env python
# coding: utf-8

# In[1]:


#Importing the required libraries
import os
import pandas as pd
import numpy as np
import random

import import_ipynb
import SelectIndices as si

# Set the random seed
random.seed(10012)


# In[2]:


dataset_names = ['eo', 'stwts']
embed_types = ['cvec_pca16', 'cvec_nmf16', 'cvec_umap16', 'cvec_tsne16', 'lda100', 'bert', 'roberta', 'distil', 'glove6B', 'universal']


# In[3]:


saved_list = []
for i in range(len(dataset_names)):
  for j in range(len(embed_types)):
    data = pd.read_csv("data/output/"+dataset_names[i] + '_' + embed_types[j] + '_full.csv', index_col=0)
    data = data.to_numpy()
    print(dataset_names[i] + '_' + embed_types[j] + '_full.csv')
    mu_list, sd_list = si.fit_norm(data)
    print('Creating kld matrix...')
    saved_list.append(si.get_kld_matrix(mu_list, sd_list, dataset_names[i], embed_types[j]))
print()
saved_list


# In[4]:


saved_list = []
for i in range(len(dataset_names)):
  for j in range(len(embed_types)):
    data = pd.read_csv("data/output/"+dataset_names[i] + '_' + embed_types[j] + '_full.csv', index_col=0)
    data = data.to_numpy()
    print(dataset_names[i] + '_' + embed_types[j] + '_full.csv')
    print('Creating ks matrix...')
    saved_list.append(si.get_ks_matrix(data, dataset_names[i], embed_types[j]))
print()
saved_list


# In[5]:


saved_list = []
for i in range(len(dataset_names)):
  for j in range(len(embed_types)):
    data = pd.read_csv("data/output/"+dataset_names[i] + '_' + embed_types[j] + '_full.csv', index_col=0)
    data = data.to_numpy()
    print(dataset_names[i] + '_' + embed_types[j] + '_full.csv')
    print('Creating cos matrix...')
    saved_list.append(si.get_cos_matrix(data, dataset_names[i], embed_types[j]))
print()
saved_list

